#--------------------------------------------------------------------------------------------------------------------#
#----------------------------------------------- Portmann & Stojanovic ----------------------------------------------#
#--------------------------------------------------- November 2020 --------------------------------------------------#
#------- Are Immigrant-Origin Candidates Penalized in Virtue of Ingroup Favoritism or Outrgoup Hostility? -----------#
#--------------------------------------------------------------------------------------------------------------------#
#-------------------------------------------------- Build variables -------------------------------------------------#
#--------------------------------------------------------------------------------------------------------------------#

rm(list=ls())
setwd(".../...")
load("d_nr15")

# ------------------------------------------------------------------ Build independent variables

# ----- Cand_id

d_nr15$list_id <- (d_nr15$bfscantno.x*100) + d_nr15$bfslistcode
d_nr15$cand_id <- (d_nr15$list_id*100) + d_nr15$candidate_number
describe(d_nr15$cand_id)

# ----- Non-Swiss name

#Code maiden name
d_nr15$nS_name_base <- NA
d_nr15$nS_name_base <- ifelse(d_nr15$bfsnamdoubl==1, d_nr15$origname2_swissname, d_nr15$origname1_swissname)
table(d_nr15$nS_name_base)
describe(d_nr15$nS_name_base)

#Nonswissname
d_nr15$nonswissname <- NA
d_nr15$nonswissname[d_nr15$nS_name_base==0] <- 1
d_nr15$nonswissname[d_nr15$nS_name_base==1] <- 0
d_nr15$nonswissname[d_nr15$nS_name_base==2] <- 0

# ----- Name origin

#Code maiden name
d_nr15$origin_name_base <- NA
d_nr15$origin_name_base <- ifelse(d_nr15$bfsnamdoubl==1, d_nr15$origname2_code, d_nr15$origname1_code)
table(d_nr15$origin_name_base)

#Name origin (1=Swiss name; 2=non-Swiss, Western name; 3=non-Swiss, non-Western name)
d_nr15$origin_name_c3 <- NA
d_nr15$origin_name_c3 <- mapvalues(d_nr15$origin_name_base, 
from = c(1, 2, 3, 4, 7, 5, 6, 8, 9, 10, 11, 12, 13, 99), 
to = c(1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3))

#Recode Greek cases form Southern Europe/Italy to 7 (Western European) for variable with 8 categories
d_nr15$origin_name_base <- ifelse(d_nr15$origin_name_base == 4 & d_nr15$origname1_txt=="Greece", 7, d_nr15$origin_name_base)

#Recode Bangladesh from Southeastern/Eastern Asian (9) to Southern Asian (8)
d_nr15$origin_name_base <- ifelse(d_nr15$origin_name_base == 9 & d_nr15$origname1_txt=="Bangladesh", 8, d_nr15$origin_name_base)

#Name origin 8 categories
d_nr15$origin_name_c8 <- NA
d_nr15$origin_name_c8 <- mapvalues(d_nr15$origin_name_base, 
from = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 99),
to = c(0, 4, 5, 2, 1, 1, 5, 6, 6, 3, 6, 3, 7, 7))
table(d_nr15$origin_name_c8)

d_nr15$origin_name_c8 <- factor(d_nr15$origin_name_c8,
levels=c(0, 1, 2, 3, 4, 5, 6, 7), 
labels=c("Swiss", "Former Yugoslav&Albanian", "Italy", "Turkey&Arabs", "Hispanics", 
"Other European countries", "Asia", "Other non-European countries"))

#Build dummies
d_nr15$origin_YugAlb <- as.numeric(d_nr15$origin_name_c8 == "Former Yugoslav&Albanian")
table(d_nr15$origin_YugAlb)
d_nr15$origin_Italy <- as.numeric(d_nr15$origin_name_c8 == "Italy")
table(d_nr15$origin_Italy)
d_nr15$origin_TurkArab <- as.numeric(d_nr15$origin_name_c8 == "Turkey&Arabs")
table(d_nr15$origin_TurkArab)
d_nr15$origin_Hispanic <- as.numeric(d_nr15$origin_name_c8 == "Hispanics")
table(d_nr15$origin_Hispanic)
d_nr15$origin_OtherEurope <- as.numeric(d_nr15$origin_name_c8 == "Other European countries")
table(d_nr15$origin_OtherEurope)
d_nr15$origin_OtherNonEU <- as.numeric(d_nr15$origin_name_c8 == "Other non-European countries" | d_nr15$origin_name_c8 == "Asia")
table(d_nr15$origin_OtherNonEU)

# ----- Variable non-Swiss name with cantonalname: Swiss-name & non-cantonal name (0), Swiss name but not cantonal name (1), 
#non-Swiss and non-cantonal name (2)

#Code maiden nmae
d_nr15$nC_name_base <- NA
d_nr15$nC_name_base <- ifelse(d_nr15$bfsnamdoubl==1, d_nr15$origname2_cantonalname, d_nr15$origname1_cantonalname)
table(d_nr15$nC_name_base)

#Build variable
d_nr15$nonswissname_cant <- NA
d_nr15$nonswissname_cant <- ifelse((d_nr15$nC_name_base==1 | d_nr15$nC_name_base==2), 0, ifelse((d_nr15$nS_name_base==1 | d_nr15$nS_name_base==2), 1, 2))
table(d_nr15$nonswissname_cant)

# ----- Variable non-Swiss name language region: (0) Swiss name, (1) non-Swiss German-speaking, (2) non-Swiss French-speaking, 
#(3) non-Swiss Italian-speaking, (5) non-Swiss other languages

d_nr15$nSL_name_base <- NA
d_nr15$nSL_name_base <- ifelse(d_nr15$bfsnamdoubl==1, d_nr15$origname2_lang, d_nr15$origname1_lang)
table(d_nr15$nSL_name_base)

d_nr15$origname_lang <-  mapvalues(d_nr15$nSL_name_base,
from = c(0, 1, 2, 3, 5, 99), 
to = c(0, 1, 2, 3, 5, 5))
table(d_nr15$origname_lang)

# ----- Variable non-Swiss name catholic

d_nr15$nSC_name_base <- NA
d_nr15$nSC_name_base <- ifelse(d_nr15$bfsnamdoubl==1, d_nr15$origname2_cath, d_nr15$origname1_cath)
table(d_nr15$nSC_name_base)

d_nr15$origname_cath <-  mapvalues(d_nr15$nSC_name_base,
from = c(0, 1, 2, 3, 99), 
to = c(0, 1, 2, 2, 2))
table(d_nr15$origname_cath)
table(d_nr15$rel_catholic)

# ----- Variable non-Swiss name 3 categories: (0) Swiss before 1800, (1) Swiss, not before 1800, (2) non-Swiss

d_nr15$nonswissname_natyr <- NA
d_nr15$nonswissname_natyr[d_nr15$nS_name_base==0] <- 2
d_nr15$nonswissname_natyr[d_nr15$nS_name_base==1] <- 1
d_nr15$nonswissname_natyr[d_nr15$nS_name_base==2] <- 0
table(d_nr15$nonswissname_natyr)
 
# ----- Number of candidates on list

d_nr15 <- d_nr15 %>% dplyr::group_by(list_id) %>%
dplyr::mutate(num_cand=n_distinct(cand_id))
table(d_nr15$num_cand)

# ----- District magnitude (https://www.ch.ch/de/wahlen2015/schweizer-parlament/nationalrat/quanti-seggi-spettano-al-vostro-cantone-in-consiglio-naziona/)

d_nr15$distmag <- NA
d_nr15$distmag <- ifelse(d_nr15$bfscantno.x == 1, 35, ifelse(d_nr15$bfscantno.x == 2, 
25, ifelse(d_nr15$bfscantno.x == 3,  10, ifelse(d_nr15$bfscantno.x == 5, 4, 
ifelse(d_nr15$bfscantno.x == 9, 3, ifelse(d_nr15$bfscantno.x == 10, 7, 
ifelse(d_nr15$bfscantno.x == 11, 6, ifelse(d_nr15$bfscantno.x == 12, 5, 
ifelse(d_nr15$bfscantno.x == 13, 7, ifelse(d_nr15$bfscantno.x == 14, 2, 
ifelse(d_nr15$bfscantno.x == 17, 12, ifelse(d_nr15$bfscantno.x == 18, 5, 
ifelse(d_nr15$bfscantno.x == 20, 6, ifelse(d_nr15$bfscantno.x == 21, 8, 
ifelse(d_nr15$bfscantno.x == 22, 18, ifelse(d_nr15$bfscantno.x == 23, 8, 
ifelse(d_nr15$bfscantno.x == 24, 4, ifelse(d_nr15$bfscantno.x == 25, 11,
ifelse(d_nr15$bfscantno.x == 26, 2, NA)))))))))))))))))))
describe(d_nr15$distmag)

# ----- Relative list position

#Number of candidates on list
d_nr15$highpos <- ifelse(is.na(d_nr15$bfsposition2), d_nr15$bfsposition1, d_nr15$bfsposition2)

d_nr15 <- d_nr15 %>% dplyr::group_by(list_id) %>%
dplyr::mutate(num_listpos = max(highpos))
table(d_nr15$num_listpos)
describe(d_nr15$num_listpos)

d_nr15$rel_pos <- NA
d_nr15$rel_pos <- ifelse(is.na(d_nr15$bfsposition2), d_nr15$bfsposition1/d_nr15$num_listpos, 
((d_nr15$bfsposition1 + d_nr15$bfsposition2)/2)/d_nr15$num_listpos)
table(d_nr15$rel_pos)

# ----- Age

d_nr15$age <- 2015 - d_nr15$bfsyearborn
describe(d_nr15$age)

# ----- Age2

d_nr15$age2 <- d_nr15$age^2

# ----- Age 3 categories

table(d_nr15$age)

d_nr15$age_c3 <- NA
d_nr15$age_c3 <- ifelse(d_nr15$age <=30, 1, 
ifelse(d_nr15$age > 30 & d_nr15$age <=50, 2, 
ifelse(d_nr15$age > 50, 3, NA)))

# ----- Party magnitude

d_nr15$bfspartyno_rec = NA
d_nr15$bfspartyno_rec = ifelse(d_nr15$bfspartyno < 35, d_nr15$bfspartyno, 
ifelse(d_nr15$bfslist_txt == "ECOPOP, Lebensqualit�t statt Wachstumszwang", 35.1, 
ifelse(d_nr15$bfslist_txt == "Unabh�ngigkeitspartei up!", 35.2, 
ifelse(d_nr15$bfslist_txt ==  "Zentrumspartei", 35.3,     
ifelse(d_nr15$bfslist_txt ==  "Piratenpartei � humanistisch liberal progressiv", 35.4,    
ifelse(d_nr15$bfslist_txt ==  "Anti-PowerPoint-Partei", 35.5,     
ifelse(d_nr15$bfslist_txt ==  "Alternative Linke, AL - Alternative Liste", 35.6,   
ifelse(d_nr15$bfslist_txt ==  "Kunst + Politik", 35.7,       
ifelse(d_nr15$bfslist_txt ==  "DU Die Unpolitischen", 35.8,       
ifelse(d_nr15$bfslist_txt ==  "Stopp Stau und Blitzerterror - die Autofahrer Liste",  35.9,      
ifelse(d_nr15$bfslist_txt ==  "Tierpartei Schweiz (TPS)", 35.11,    
ifelse(d_nr15$bfslist_txt ==  "Fl�ckiger Hans Ulrich (Hanf Ueli), unabh�ngig", 35.12,  
ifelse(d_nr15$bfslist_txt ==  "Schweizer Freiheit und Recht", 35.13, 
ifelse(d_nr15$bfslist_txt ==  "Piraten,  Pirates", 35.4, 
ifelse(d_nr15$bfslist_txt ==  "Alpenparlament,  Alpenparlament", 35.14, 
ifelse(d_nr15$bfslist_txt ==  "Philipp Jutzi,  Philipp Jutzi", 35.15, 
ifelse(d_nr15$bfslist_txt ==  "Parteilose Schweizer", 35.16, 
ifelse(d_nr15$bfslist_txt ==  "Integrale Politik (IP)", 35.17, 
ifelse(d_nr15$bfslist_txt ==  "Piratenpartei", 35.4,
ifelse(d_nr15$bfslist_txt ==  "Direktdemokratische Partei Schweiz (DPS Kt. SO)", 35.18,
ifelse(d_nr15$bfslist_txt ==  "el presidente", 35.19,
ifelse(d_nr15$bfslist_txt ==  "Volks-Aktion gegen zuviele Ausl�nder und Asylanten in unserer Heimat (VA) - Liste Ausl�nderstopp", 35.21, 
ifelse(d_nr15$bfslist_txt ==  "mach-politik.ch", 35.22, 
ifelse(d_nr15$bfslist_txt ==  "Gr�ne-Unabh�ngige", 35.23,
ifelse(d_nr15$bfslist_txt ==  "Alternative Liste Schaffhausen (AL)", 35.6, 
ifelse(d_nr15$bfslist_txt ==  "Parteifrei SG", 35.16, 
ifelse(d_nr15$bfslist_txt ==  "IP Integrale Politik", 35.17, 
ifelse(d_nr15$bfslist_txt ==  "Sarah B�sch das Original", 35.24,  
ifelse(d_nr15$bfslist_txt ==  "Direktdemokratische Partei Schweiz, Kanton St.Gallen", 35.18,
ifelse(d_nr15$bfslist_txt ==  "Piratenpartei SG AI AR", 35.4, 
ifelse(d_nr15$bfslist_txt ==  "Marcel Giger Amden parteilos", 35.25, 
ifelse(d_nr15$bfslist_txt ==  "Patriotisch Liberale Demokraten", 35.26, 
ifelse(d_nr15$bfslist_txt ==  "Direktdemokratische Partei Schweiz (DPS)", 35.18, 
ifelse(d_nr15$bfslist_txt ==  "I Liberisti", 35.27,  
ifelse(d_nr15$bfslist_txt ==  "MontagnaViva", 35.28, 
ifelse(d_nr15$bfslist_txt ==  "Partito Comunista - PC", 35.29, 
ifelse(d_nr15$bfslist_txt ==  "Lega Sud", 35.39, 
ifelse(d_nr15$bfslist_txt ==  "Liste du Vote Blanc (LVB)", 35.31, 
ifelse(d_nr15$bfslist_txt ==  "PNS - Parti Nationaliste Suisse", 35.32, 
ifelse(d_nr15$bfslist_txt ==  "Les Ind�pendants Vaudois", 35.33, 
ifelse(d_nr15$bfslist_txt ==  "Ecopop", 35.2, 
ifelse(d_nr15$bfslist_txt ==  "Parti Pirate vaudois", 35.4, 
ifelse(d_nr15$bfslist_txt ==  "Graines de Futur", 35.34, 
ifelse(d_nr15$bfslist_txt ==  "Nouveau Parti Lib�ral (NPL)", 35.35, 
ifelse(d_nr15$bfslist_txt ==  "Liste du vote blanc", 35.31, 
ifelse(d_nr15$bfslist_txt ==  "Mouvement D�mocratique Cadmos", 35.36, 
ifelse(d_nr15$bfslist_txt ==  "impossible alternative (ia)", 35.37, 
ifelse(d_nr15$bfslist_txt ==  "Politique Int�grale, dans l'intelligence du c�ur", 35.17, 
ifelse(d_nr15$bfslist_txt ==  "Rauraque du Nord", 35.38, NA)))))))))))))))))))))))))))))))))))))))))))))))))
describe(d_nr15$bfspartyno_rec)

#Generate variable party magnitude
d_nr15 <- d_nr15 %>% group_by(bfscantno.x, bfspartyno_rec) %>% 
dplyr::mutate(partymag=n_distinct(cand_id))
describe(d_nr15$partymag)

# ----- Party position

d_nr15$partypos <- NA
d_nr15$partypos <- mapvalues(d_nr15$bfspartyno_rec, 
from = c(1, 2, 3, 4, 7, 8, 9, 13, 15, 16, 18, 27, 31, 32, 33, 
35.1, 35.2, 35.3, 35.4, 35.5, 35.6, 35.7, 35.8, 35.9, 35.11, 35.12, 35.13, 35.14, 35.15, 35.16, 
35.17, 35.18, 35.19, 35.21, 35.22, 35.23, 35.24, 35.25, 35.26, 35.27, 35.28, 35.29, 35.39, 35.31, 
35.32, 35.33, 35.34, 35.35, 35.36, 35.37, 35.38),
to = c(1, 2, 3, 1, 2, 2, 3, 3, 1, 1, 1, 3, 2, 2, 1, 
4, 4, 1, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
1, 4, 1, 1, 4, 4, 4, 4, 4, 4, 3, 1, 4, 1, 4, 4, 4, 4, 4, 4))

#Coding of parties:

#Main parties: 
#RIGHT: fdp, svp, sd, edu
#CENTER: glp, bdp, cvp, evp
#LEFT: sp, gps, PdA

#Coding of smaller parties
#Ecopo: others; Unabh�ngigkeitspartei up!: others; Zentrumspartei: right; Piratenpartei: others; 
#Anti-PowerPoint-Partei: others; Alternative Liste: left; Kunst + Politik: others;
#DU Die Unpolitischen: others; Stopp Stau und Blitzerterror - die Autofahrer Liste: others;
#Tierpartei Schweiz (TPS): others; Fl�ckiger Hans Ulrich (Hanf Ueli), unabh�ngig: others;
#Schweizer Freiheit und Recht: others; Alpenparlament: others (k�nnte man auch als rechte Partei klassifizieren;
#Philipp Jutzi: others; Parteilose Schweizer: others; Integrale Politik (IP): others (evtl. auch left);
#Direktdemokratische Partei Schweiz: right; el president: others; 
#Volks-Aktion gegen zuviele Ausl�nder und Asylanten in unserer Heimat (VA): RIGHT;
#mach-politik.ch: others; Gr�ne-Unabh�ngige: left; Sarah B�sch das Original: others;
#Marcel Giger Amden parteilos: others; Patriotisch Liberale Demokraten: rechts; 
#I Liberisti: others; MontagnaViva: OTHERS?; Partito Comunista: LEFT; Lega Sud: RIGHT;
#Liste du Vote Blanc: others; Parti Nationaliste Suisse: right; Les Ind�pendants Vaudois: others;
#Graines de Futur: others; Nouveau Parti Lib�ral: others; Mouvement D�mocratique Cadmos: others;
#impossible alternative: others; Rauraque du Nord: others

# ----- Precumulated

d_nr15$precumulated <- NA
d_nr15$precumulated <- ifelse(is.na(d_nr15$bfsposition2), 0, 1)
table(d_nr15$precumulated)

# ----- Gender

table(d_nr15$bfssex)

# ----- Incumbent

#Variable: bfsincumbency

# ----- Generate variable profession

d_nr15$profcode1_l2_b <- as.numeric(as.character(d_nr15$profcode1_l2_b))
d_nr15$prof_c3_b <- 0
d_nr15$prof_c3_b[d_nr15$profcode1_l2_b <= 24] <- 1
d_nr15$prof_c3_b[d_nr15$profcode1_l2_b > 24 & d_nr15$profcode1_l2_b < 60] <- 2
d_nr15$prof_c3_b[d_nr15$profcode1_l2_b > 60] <- 3
d_nr15$prof_c3_b[d_nr15$profcode1_l2_b == 33] <- 3
d_nr15$prof_c3_b[d_nr15$profcode1_l2_b == 95] <- 2
table(d_nr15$profcode1_l2_b, d_nr15$prof_c3_b)
describe(d_nr15$prof_c3_b)

# ------------------------------------------------------------------ Build context variables

# ----- Naturalizations in relation to population size

d_nr15$prop_ogn_Italy <- NA
d_nr15$prop_ogn_Italy <- d_nr15$ogn_Italy/d_nr15$total_bev

d_nr15$prop_ogn_France <- NA
d_nr15$prop_ogn_France <- d_nr15$ogn_France/d_nr15$total_bev

d_nr15$prop_ogn_Germany <- NA
d_nr15$prop_ogn_Germany <- d_nr15$ogn_Germany/d_nr15$total_bev

d_nr15$prop_ogn_Portugal <- NA
d_nr15$prop_ogn_Portugal <- d_nr15$ogn_Portugal/d_nr15$total_bev

d_nr15$prop_ogn_Spain <- NA
d_nr15$prop_ogn_Spain <- d_nr15$ogn_Spain/d_nr15$total_bev

d_nr15$prop_ogn_OtherSouthernEurope <- NA
d_nr15$prop_ogn_OtherSouthernEurope <- d_nr15$ogn_OtherSouthernEurope/d_nr15$total_bev

d_nr15$prop_ogn_EasternEuropean <- NA
d_nr15$prop_ogn_EasternEuropean <- d_nr15$ogn_EasternEuropean/d_nr15$total_bev

d_nr15$prop_ogn_OtherWesternEuropean <- NA
d_nr15$prop_ogn_OtherWesternEuropean <- d_nr15$ogn_OtherWesternEuropean/d_nr15$total_bev

d_nr15$prop_ogn_Yugoslav <- NA
d_nr15$prop_ogn_Yugoslav <- d_nr15$ogn_Yugoslav/d_nr15$total_bev

d_nr15$prop_ogn_Albanian <- NA
d_nr15$prop_ogn_Albanian <- d_nr15$ogn_Albanian/d_nr15$total_bev

d_nr15$prop_ogn_OtherHispanic <- NA
d_nr15$prop_ogn_OtherHispanic <- d_nr15$ogn_OtherHispanic/d_nr15$total_bev

d_nr15$prop_ogn_Indian <- NA
d_nr15$prop_ogn_Indian <- d_nr15$ogn_Indian/d_nr15$total_bev

d_nr15$prop_ogn_EasternAsian <- NA
d_nr15$prop_ogn_EasternAsian <- d_nr15$ogn_EasternAsian/d_nr15$total_bev

d_nr15$prop_ogn_CentralAsian <- NA
d_nr15$prop_ogn_CentralAsian <- d_nr15$ogn_CentralAsian/d_nr15$total_bev

d_nr15$prop_ogn_Turkish <- NA
d_nr15$prop_ogn_Turkish <- d_nr15$ogn_Turkish/d_nr15$total_bev

d_nr15$prop_ogn_OtherArabic_Maghreb <- NA
d_nr15$prop_ogn_OtherArabic_Maghreb <- d_nr15$ogn_OtherArabic_Maghreb/d_nr15$total_bev

d_nr15$prop_ogn_OtherArabic_MiddleEast <- NA
d_nr15$prop_ogn_OtherArabic_MiddleEast <- d_nr15$ogn_OtherArabic_MiddleEast/d_nr15$total_bev

d_nr15$prop_ogn_OtherAfrican <- NA
d_nr15$prop_ogn_OtherAfrican <- d_nr15$ogn_OtherAfrican/d_nr15$total_bev

d_nr15$prop_ogn_StatelessUnbekannt_others <- NA
d_nr15$prop_ogn_StatelessUnbekannt_others <- d_nr15$ogn_StatelessUnbekannt_others/d_nr15$total_bev

d_nr15$prop_ogn_foreign_tot <- NA
d_nr15$prop_ogn_foreign_tot <- (d_nr15$ogn_Italy + d_nr15$ogn_France + d_nr15$ogn_Germany + d_nr15$ogn_Portugal + d_nr15$ogn_Spain +
d_nr15$ogn_OtherSouthernEurope + d_nr15$ogn_EasternEuropean + d_nr15$ogn_OtherWesternEuropean + 
d_nr15$ogn_Yugoslav + d_nr15$ogn_Albanian + d_nr15$ogn_OtherHispanic + d_nr15$ogn_Indian + d_nr15$ogn_EasternAsian +
d_nr15$ogn_CentralAsian + d_nr15$ogn_Turkish + d_nr15$ogn_OtherArabic_Maghreb + d_nr15$ogn_OtherArabic_MiddleEast + 
d_nr15$ogn_OtherAfrican + d_nr15$ogn_StatelessUnbekannt_others)/d_nr15$total_bev

# ----- Naturalizations in relation to population size, separated for migrant groups

d_nr15$prop_ogn_YugAlb <- d_nr15$prop_ogn_Yugoslav + d_nr15$prop_ogn_Albanian
d_nr15$prop_ogn_Italy
d_nr15$prop_ogn_TurkArab <- d_nr15$prop_ogn_Turkish + d_nr15$prop_ogn_OtherArabic_Maghreb +
d_nr15$prop_ogn_OtherArabic_MiddleEast
d_nr15$prop_ogn_Hispanic <- d_nr15$prop_ogn_OtherHispanic + d_nr15$prop_ogn_Portugal + 
d_nr15$prop_ogn_Spain
d_nr15$prop_ogn_OtherEurope <- d_nr15$prop_ogn_France + d_nr15$prop_ogn_Germany +
d_nr15$prop_ogn_OtherSouthernEurope + d_nr15$prop_ogn_EasternEuropean +
d_nr15$prop_ogn_OtherWesternEuropean
d_nr15$prop_ogn_OtherNonEU <- d_nr15$prop_ogn_OtherAfrican + d_nr15$prop_ogn_StatelessUnbekannt_others + 
d_nr15$prop_ogn_Indian + d_nr15$prop_ogn_EasternAsian +
d_nr15$prop_ogn_CentralAsian

# ----- Indicator for conservative attitudes from federal initiatives

names(d_nr15)
describe(d_nr15$i_RegZu)
describe(d_nr15$i_EinbInit)
describe(d_nr15$i_Minarette)
describe(d_nr15$i_AusInit)
#describe(d_nr15$Massenweinw)

#Recode initiative "f�r demokratische Einb�rgerungen"

d_nr15$i_EinbInit <- 100 - d_nr15$i_EinbInit
#Nochmals anschauen, ob braucht!!!

# ----- Build factor

fit <- factanal(x = d_nr15[, c("i_RegZu", "i_EinbInit", "i_Minarette", 
"i_AusInit", "i_Massenweinw")], factors = 1, scores = c("regression"), rotation = "none")

print(fit, digits=2, cutoff=.3, sort=TRUE)
#All factor loadings > 0.85; Variables seem to measure similar construct.

#Add factor scores to data
fs <- data.frame(fit$scores)
d_nr15$rowname <- rownames(d_nr15)
fs$rowname   <- rownames(fs)

d_nr15 <- left_join(d_nr15, fs, by = "rowname")

names(d_nr15)[names(d_nr15)=="Factor1"] <- "dd_cons"

# ----- Raumgliederung 

#The following variables serve as controls at the municipal level: bfsan_sprache, bfsraum_groesse, 
#bfsraum_staedtisch, bfsraum_staedte, bfsraum_degurba

# ----- Prepare data "Gemeindeportr�t"

#Sectors

d_nr15$empl_1 <- as.numeric(d_nr15$empl_1)
d_nr15$empl_2 <- as.numeric(d_nr15$empl_2 )
d_nr15$empl_3 <- as.numeric(d_nr15$empl_3)
d_nr15$empl_tot <- as.numeric(d_nr15$empl_tot)

d_nr15$empl_1_perc <- d_nr15$empl_1/d_nr15$empl_tot
d_nr15$empl_2_perc <- d_nr15$empl_2/d_nr15$empl_tot
d_nr15$empl_3_perc <- d_nr15$empl_3/d_nr15$empl_tot

#Social aid: soc_aid

#Crime rate: crime_stgb, crime_betmg, crime_aug; Durchschnitt Total berechnen

d_nr15$crime_tot <- (d_nr15$crime_stgb + d_nr15$crime_betmg + d_nr15$crime_aug)/3
describe(d_nr15$crime_tot)

#Vote Share SVP: party_mun_svp.

# ------------------------------------------------------------------ Build dependent variable

# ----- Negative preference votes

# Reference: modified ballot

d_nr15$n_pv <- NA
d_nr15$n_pv <- d_nr15$removed_preferences/d_nr15$bfsballots_changed
d_nr15$n_pv <- ifelse(d_nr15$bfsballots_changed == 0, NA, d_nr15$n_pv) #NA for lists where no candidate has been modified
d_nr15$n_pv_rc <- d_nr15$n_pv/2
describe(d_nr15$n_pv_rc)

# Reference: Average number of negative preference votes candidates on party list

d_nr15$npv_rel <- NA
d_nr15 <- d_nr15 %>% dplyr::group_by(list_id, bfsresidence_code) %>%
dplyr::mutate(mean_npv_list = mean(removed_preferences))

d_nr15$npv_rel <- d_nr15$removed_preferences/d_nr15$mean_npv_list
d_nr15$npv_rel <- ifelse(d_nr15$mean_npv_list == 0, NA, d_nr15$npv_rel) 
describe(d_nr15$npv_rel)

# ----- Cumulation

# Reference: modified ballots

d_nr15$cum <- d_nr15$added_preferences_from_same_list/d_nr15$bfsballots_changed
d_nr15$cum <- ifelse(d_nr15$bfscantno.x == 21 | d_nr15$bfscantno.x == 24 | d_nr15$bfscantno.x == 25, NA, d_nr15$cum)
#Data missing for Ticino, Geneva, and Neuchatel

d_nr15$cum_rc <- d_nr15$cum/2
describe(d_nr15$cum)

# Reference: Average number of cumulations candidates on party list

d_nr15$cum_rel <- NA
d_nr15 <- d_nr15 %>% dplyr::group_by(list_id, bfsresidence_code) %>%
dplyr::mutate(mean_cum_list = mean(added_preferences_from_same_list))

d_nr15$mean_cum_list <- ifelse(d_nr15$bfscantno.x == 21 | d_nr15$bfscantno.x == 24 | d_nr15$bfscantno.x == 25, NA, d_nr15$mean_cum_list)
d_nr15$cum_rel <- d_nr15$added_preferences_from_same_list/d_nr15$mean_cum_list
d_nr15$cum_rel <- ifelse(d_nr15$mean_cum_list == 0, NA, d_nr15$cum_rel)
describe(d_nr15$cum_rel)

# ----- Panachage

#Variable 1: Average votes as a reference
d_nr15$bfscantno.y <- as.numeric(d_nr15$bfscantno.y)
d_nr15$bfslistcode <- as.numeric(d_nr15$bfslistcode)
d_nr15 <- d_nr15 %>% dplyr::group_by(list_id, bfsresidence_code) %>%
dplyr::mutate(mean_pan_list = mean(added_preferences_from_other_lis))

d_nr15$mean_pan_list <- ifelse(d_nr15$bfscantno.x == 21, NA, d_nr15$mean_pan_list)
d_nr15$pan_mean <- d_nr15$added_preferences_from_other_lis/d_nr15$mean_pan_list
d_nr15$pan_mean <- ifelse(d_nr15$mean_pan_list == 0, NA, d_nr15$pan_mean)
describe(d_nr15$pan_mean)

#Variable 2: Modified ballots as a reference 
d_nr15$pan_mod <- NA
d_nr15$pan_mod <- ifelse(d_nr15$bfsballots_changed == 0 | d_nr15$bfscantno.x == 21, NA, ifelse(d_nr15$added_preferences_from_other_lis != 0, 
d_nr15$added_preferences_from_other_lis/d_nr15$bfsballots_changed, 0))
#Here we find Missings if Panaschiert, but party list of candidate has never been modified. Must be dropped from analysis.
describe(d_nr15$pan_mod)

# ----- Positive preference votes

#Variable 1: Average positive preference votes as a reference
d_nr15$p_pv <- d_nr15$added_preferences_from_same_list + 
d_nr15$added_preferences_from_other_lis + d_nr15$added_preferences_from_not_heade

#Add positive preference votes for ticino, geneva, neuchatel

d_nr15$p_pv <- ifelse(d_nr15$bfscantno.x == 21, d_nr15$add_votes_ti, d_nr15$p_pv)
d_nr15$p_pv <- ifelse(d_nr15$bfscantno.x == 24 | d_nr15$bfscantno.x == 25, d_nr15$add_votes_nege, d_nr15$p_pv)
describe(d_nr15$p_pv)

d_nr15 <- d_nr15 %>% dplyr::group_by(list_id, bfsresidence_code) %>%
dplyr::mutate(mean_ppv_list = mean(p_pv))

d_nr15$pos_mean <- d_nr15$p_pv/d_nr15$mean_ppv_list
d_nr15$pos_mean <- ifelse(d_nr15$mean_ppv_list == 0, NA, d_nr15$pos_mean)
describe(d_nr15$pos_mean)

#Variable 2: Modified ballots as a reference
d_nr15$pos_mod <- NA
d_nr15$pos_mod <- ifelse(d_nr15$bfsballots_changed==0, NA, ifelse(d_nr15$p_pv != 0, d_nr15$p_pv/d_nr15$bfsballots_changed, 0))
#Hier gibt es Missings, falls Panaschiert, aber WZ nie ver�ndert. Muss weglassen f�r Analyse.
describe(d_nr15$pos_mod)

#------------------------------------------------------------------------------------------------
#-------------------------------------------------------------- Generate data set municipal level
#------------------------------------------------------------------------------------------------

d_nr15_gde <- d_nr15[, c("n_pv_rc", "cum_rc", "pan_mean", "pan_mod", "pos_mean", "pos_mod",
"cand_id", "list_id", "bfscantno.y", "nonswissname", "origin_name_c3", "age2",
"rel_pos", "age", "partypos", "precumulated", "num_cand", "bfsresidence_code", 
"bfssex", "bfsincumbency","dd_cons", "bfsan_sprache", "bfsraum_groesse", 
"bfsraum_staedtisch", "bfsraum_staedte", "bfsraum_degurba", "canton.x",
"num_listpos", "partymag", "rel_catholic", 
"prop_ogn_foreign_tot", "prop_ogn_YugAlb", "prop_ogn_Italy", 
"prop_ogn_TurkArab", "prop_ogn_Hispanic", "prop_ogn_OtherEurope", "prop_ogn_OtherNonEU", 
"nonswissname_cant", "origname_lang", "nonswissname_natyr", "origname_cath",
"bev_ausl_perc_2015", "foreign_born_perc_2015", "bev_ausl_change", "foreign_born_change", 
"soc_aid", "crime_stgb", "crime_betmg", "crime_aug", "crime_tot", "party_mun_svp", "empl_1_perc", 
"empl_2_perc", "empl_3_perc", "npv_rel", "age_c3", "bfsballots_unchanged", "bfsballots_changed", "distmag")]

save(d_nr15_gde, file="d_nr15_gde")
                            

#------------------------------------------------------------------------------------------------
#---------------------------------------------------------- Aggregate data candidate level models
#------------------------------------------------------------------------------------------------

#Keep variables
d_nr15_cand <- d_nr15[, c("removed_preferences", "bfsballots_changed", 
"added_preferences_from_same_list", "added_preferences_from_other_lis", 
"added_preferences_from_not_heade",
"list_id", "bfscantno.x", "nonswissname", "origin_name_c3", "age2",
"rel_pos", "age", "partypos", "precumulated", "num_cand", "cand_id",
"bfssex", "bfsincumbency", "bfslistcode", "num_listpos", "partymag", 
"origin_name_base", "nonswissname_cant", "origname_lang", 
"nonswissname_natyr", "origname_cath", "bfselected", "add_votes_ti", 
"age_c3", "bfsballots_unchanged", "profcode1_l2_b", "prof_c3_b", 
"add_votes_nege", "distmag", "bfspartyno", "bfslist_txt")]

#Aggregate data
d_nr15_cand <- d_nr15_cand %>% dplyr::group_by(bfscantno.x, list_id, cand_id, bfslist_txt) %>% 
dplyr::summarize(removed_preferences = sum(removed_preferences),
bfsballots_changed  = sum(bfsballots_changed),
bfsballots_unchanged = sum(bfsballots_unchanged),
added_preferences_from_same_list = sum(added_preferences_from_same_list), 
added_preferences_from_other_lis = sum(added_preferences_from_other_lis), 
added_preferences_from_not_heade= sum(added_preferences_from_not_heade),
add_votes_ti = sum(add_votes_ti),
add_votes_nege = sum(add_votes_nege),
nonswissname = mean(nonswissname), origin_name_c3 = mean(origin_name_c3), 
rel_pos = mean(rel_pos), age = mean(age), age2 = mean(age2), partypos = mean(partypos), 
precumulated = mean(precumulated), num_cand = mean(num_cand), bfssex = mean(bfssex), partymag = mean(partymag),
bfsincumbency = mean(bfsincumbency), num_listpos = mean(num_listpos), 
origin_name_base = mean(origin_name_base), nonswissname_cant = mean(nonswissname_cant), 
origname_lang = mean(origname_lang), nonswissname_natyr = mean(nonswissname_natyr), 
origname_cath = mean(origname_cath), bfselected = mean(bfselected), age_c3 = mean(age_c3), 
profcode1_l2_b = mean(profcode1_l2_b), prof_c3_b = mean(prof_c3_b), distmag = mean(distmag), 
bfspartyno = mean(bfspartyno))

#---------------------------------------------------------------------Build dependent variable

# ----- Negative preference votes

# Reference: modified ballot

#Remove lists where no candidate has been modified

d_nr15_cand$n_pv <- NA
d_nr15_cand$n_pv <- d_nr15_cand$removed_preferences/d_nr15_cand$bfsballots_changed
d_nr15_cand$n_pv <- ifelse(d_nr15_cand$bfsballots_changed == 0, NA, d_nr15_cand$n_pv)
d_nr15_cand$n_pv_rc <- d_nr15_cand$n_pv/2
describe(d_nr15_cand$n_pv_rc)

# Reference: Average number of times candidates on list crossed off

d_nr15_cand$npv_rel <- NA
d_nr15_cand <- d_nr15_cand %>% dplyr::group_by(list_id) %>%
dplyr::mutate(mean_npv_list = mean(removed_preferences))

d_nr15_cand$npv_rel <- d_nr15_cand$removed_preferences/d_nr15_cand$mean_npv_list
d_nr15_cand$npv_rel <- ifelse(d_nr15_cand$mean_npv_list  == 0, NA, d_nr15_cand$npv_rel)
describe(d_nr15_cand$npv_rel)

# ----- Cumulation

d_nr15_cand$cum_rel <- NA
d_nr15_cand <- d_nr15_cand %>% dplyr::group_by(list_id) %>%
dplyr::mutate(mean_cum_list = mean(added_preferences_from_same_list))

d_nr15_cand$mean_cum_list <- ifelse(d_nr15_cand$bfscantno.x == 21 | d_nr15_cand$bfscantno.x == 24 | 
d_nr15_cand$bfscantno.x == 25, NA, d_nr15_cand$mean_cum_list)
d_nr15_cand$cum_rel <- d_nr15_cand$added_preferences_from_same_list/d_nr15_cand$mean_cum_list
d_nr15_cand$cum_rel <- ifelse(d_nr15_cand$mean_cum_list == 0, NA, d_nr15_cand$cum_rel)
describe(d_nr15_cand$cum_rel)

# ----- Panachage

d_nr15_cand <- d_nr15_cand %>% dplyr::group_by(list_id) %>%
dplyr::mutate(mean_pan_list = mean(added_preferences_from_other_lis))

d_nr15_cand$mean_pan_list <- ifelse(d_nr15_cand$bfscantno.x == 21, NA, d_nr15_cand$mean_pan_list)
d_nr15_cand$pan_mean <- d_nr15_cand$added_preferences_from_other_lis/d_nr15_cand$mean_pan_list
d_nr15_cand$pan_mean <- ifelse(d_nr15_cand$mean_pan_list == 0, NA, d_nr15_cand$pan_mean)
describe(d_nr15_cand$pan_mean)

# ----- Positive preference votes

d_nr15_cand$p_pv <- d_nr15_cand$added_preferences_from_same_list + 
d_nr15_cand$added_preferences_from_other_lis + d_nr15_cand$added_preferences_from_not_heade
describe(d_nr15_cand$p_pv)

# Add positive preference votes Ticino, Neuchatel, Geneva

d_nr15_cand$p_pv <- ifelse(d_nr15_cand$bfscantno.x == 21, d_nr15_cand$add_votes_ti, d_nr15_cand$p_pv)
d_nr15_cand$p_pv <- ifelse(d_nr15_cand$bfscantno.x == 24 | d_nr15_cand$bfscantno.x == 25, d_nr15_cand$add_votes_nege, d_nr15_cand$p_pv)
describe(d_nr15_cand$p_pv)

d_nr15_cand <- d_nr15_cand %>% dplyr::group_by(list_id) %>%
dplyr::mutate(mean_ppv_list = mean(p_pv))

d_nr15_cand$pos_mean <- d_nr15_cand$p_pv/d_nr15_cand$mean_ppv_list
d_nr15_cand$pos_mean <- ifelse(d_nr15_cand$mean_ppv_list == 0, NA, d_nr15_cand$pos_mean)
describe(d_nr15_cand$pos_mean)

save(d_nr15_cand, file="d_nr15_cand")


# ------------------------------------------------------------------ Data preparation for models

#Build factors

cols <- c("nonswissname", "precumulated", "list_id",
          "bfssex","bfscantno.x", "bfsincumbency", "origin_name_c3", "cand_id", 
          "origin_name_base", "nonswissname_cant", "origname_lang", 
          "nonswissname_natyr", "origname_cath", "age_c3", "partypos", "prof_c3_b")
d_nr15_cand[cols] <- lapply(d_nr15_cand[cols], factor)
d_nr15_cand$age <- as.numeric(d_nr15_cand$age)

#Center variables

d_nr15_cand$age_c <- scale(d_nr15_cand$age, center=TRUE)
d_nr15_cand$age2_c <- scale(d_nr15_cand$age2, center=TRUE)
d_nr15_cand$num_listpos_c <- as.numeric(scale(d_nr15_cand$num_listpos, center=TRUE))
d_nr15_cand$rel_pos_c <- as.numeric(scale(d_nr15_cand$rel_pos, center=TRUE, scale=FALSE))
d_nr15_cand$partymag_c <- scale(d_nr15_cand$partymag, center=TRUE, scale=FALSE)
d_nr15_cand$distmag_c <- scale(d_nr15_cand$distmag, center=TRUE, scale=FALSE)

#Allocate labels to factor

d_nr15_cand$nonswissname <- factor(d_nr15_cand$nonswissname, 
levels = c(0, 1), 
labels = c("Swiss name", "Non-Swiss name"))

#Drop candidates on other lists (not classifiable on the left-right axis)
d_nr15_cand_m <- d_nr15_cand[!d_nr15_cand$partypos == 4, ]
save(d_nr15_cand_m,
file = "d_nr15_cand_m")

#Create data set for models with negative preference votes as dependent variable. Drop cases with missing values DV (party ballot has never been modified, hence there is no 
#reference) & drop variables with missing values
d_nr15_cand_n <- d_nr15_cand_m[!is.na(d_nr15_cand_m$npv_rel),]
d_nr15_cand_n <- as.data.frame(d_nr15_cand_n)
save(d_nr15_cand_n, file = "d_nr15_cand_n")

#Create data set for models with positive preference votes as dependent variable. Drop cases with missing values DV (party ballot has never been modified, hence there is not 
#reference) & drop variables with missing values
d_nr15_cand_p <- d_nr15_cand_m[!is.na(d_nr15_cand_m$pos_mean),]
d_nr15_cand_p <- d_nr15_cand_p[d_nr15_cand_p$pos_mean > 0, ]
d_nr15_cand_p <- as.data.frame(d_nr15_cand_p)
save(d_nr15_cand_p, file = "d_nr15_cand_p")

